#' ---
#' title: "Recoding ICPP09 Data"
#' author: "Gento Kato & Fan Lu"
#' date: "August 23, 2023"
#' ---
#' 
#' 
#' # Preparation 
#' 

## Clean Up Space
rm(list=ls())

## Set Working Directory (Automatically) ##
setwd(dirname(rstudioapi::getActiveDocumentContext()$path)); 

## Import Relevant Data

## Data Directory: 
## ** This is a data location of 0838_zenkoku.sav downloaded from SSJDA
## ** If downloaded to the same folder as this file, set filedir <- "./"
filedir <- "./"

### 2008 ICPP Data
require(haven)
icpp09 <- read_sav(paste0(filedir,"0838_zenkoku.sav"), encoding="CP932")
attr(icpp09$REGION,"labels") # labels of communities

#'
#' # Recoding 
#'

## Library Psych Package
require(psych)

# Initiate New Data Set
d <- data.frame(id = icpp09$id)

#'
#' ## DEPENDENT variables of interest
#' 
#' ### The local election suffrage should be granted to foreigners.
#' 
#' * Original: 1=Strongly agree 5=Strongly disagree 6=DK 7=NA
#' * Recoded: 0=Strongly disagree, 0.5=Neither/DK, 1=Strongly agree, Missing=NA

# Original Variable
tmp <- icpp09$q15_e
table(tmp, useNA="always")
# Recoded Variable
d$foreignsuff <- (5-ifelse(is.na(tmp), NA, tmp))/4
table(d$foreignsuff, useNA="always")
d$foreignsuff3 <- ifelse(d$foreignsuff==0.5,1,ifelse(d$foreignsuff>0.5,3,2))
d$foreignsuff3 <- factor(d$foreignsuff3, labels=c("Neither","Disagree","Agree"))
table(d$foreignsuff3, useNA="always")
d$foreignsuff3x <- factor(d$foreignsuff3, levels=c("Disagree","Neither","Agree"))
table(d$foreignsuff3x, useNA="always")

## Alternative Measurement

## Very High Alpha
psych::alpha(icpp09[,c("q15_b","q15_c","q15_e","q15_f")])
tmp <- rowMeans(icpp09[,c("q15_b","q15_c","q15_e","q15_f")])
table(tmp, useNA="always")
# d$foreignrights <- (5-ifelse(is.na(tmp), NA, tmp))/3
# table(d$foreignrights, useNA="always")
# d$foreignrights2 <- ifelse(d$foreignrights>0.5,1,0)
# table(d$foreignrights2, useNA="always")
tmp <- psych::fa(icpp09[,c("q15_b","q15_c","q15_e","q15_f")])
d$foreignrights <- as.numeric(-tmp$scores[,1])
d$foreignrights2 <- ifelse(d$foreignrights>0,1,0)
table(d$foreignrights2, useNA="always")

#'
#' ### Increase in immigrants
#'

tmp <- icpp09$q11
table(tmp, useNA="always")

d$immigincrease <- (4-ifelse(is.na(tmp), NA, tmp))/3
table(d$immigincrease, useNA="always")
d$immigincrease2 <- ifelse(d$immigincrease>0.5,1,0)
table(d$immigincrease2, useNA="always")

## Alternative Measurement

## Very High Alpha
psych::alpha(icpp09[,c("q12_a","q12_b","q12_c", #"q12_d",
                       "q12_e","q12_f","q12_g")])
tmp <- rowMeans(icpp09[,c("q12_a","q12_b","q12_c", #"q12_d",
                          "q12_e","q12_f","q12_g")])
table(tmp, useNA="always")
# d$immigincrease_alt <- (4-ifelse(is.na(tmp), NA, tmp))/3
# table(d$immigincrease_alt, useNA="always")
# d$immigincrease2_alt <- ifelse(d$immigincrease_alt>0.5,1,0)
# table(d$immigincrease2_alt, useNA="always")
tmp <- psych::fa(icpp09[,c("q12_a","q12_b","q12_c", #"q12_d",
                    "q12_e","q12_f","q12_g")])
tmp
summary(tmp$scores[,1])
d$immigincrease_alt <- as.numeric(-tmp$scores[,1])
d$immigincrease2_alt <- ifelse(d$immigincrease_alt>0,1,0)
table(d$immigincrease2_alt, useNA="always")

#'
#' ## PREDICTORS
#' 
#' ### Education (Ordinal)
#' 
#' * Recoded: 1= "<=SHS", 2="Junior College/Vocational School", 3=">=College" 
#' 

# Original
tmp <- icpp09$q33
table(tmp, useNA="always")
# Recoded
d$edu <- ifelse(is.na(tmp),NA,
                ifelse(tmp%in%c(1,2,6,7,8),1,
                       ifelse(tmp%in%c(3,9,10),2,3)))
# Make it a Factor
d$edu <- factor(d$edu, labels = c("<=SHS",
                                  ">SHS & <College(4yr)",
                                  ">=College(4yr)"))
table(d$edu, useNA="always")

# Education Treatment 
d$edu2 <- ifelse(d$edu==">=College(4yr)",1,0)
table(d$edu2, useNA="always")

#' 
#' ### Gender
#' 
#' * Original: 1=male 2=female 3=NA
#' * Recoded: 0=male, 1=female
#' 

# Original
tmp <- icpp09$q04
table(tmp, useNA="always")
# Recoded
d$female <- ifelse(tmp==2, 1, 0)
table(d$female, useNA="always")
d$male <- 1 - d$female

#'
#' ### Age
#'
#' * Recoded (Categorical):

# Original
tmp <- icpp09$q05
table(tmp, useNA="always")
d$age <- tmp

## Recoded Born Year (The Survey was conducted Oct-Dec, so (Sep-Nov)-(Oct-Dec))
d$bornyr <- NA
d$bornyr <- 2009 - d$age

## Academic Year of Entering College
# The survey was on October-December, so assume that they already turned 19.
d$univyr <-  2009 - (d$age-19)
unique(d$univyr[which(d$age==19)]) # If you are 19, 2009 is the year to enter

# Recoded Categorical
d$agecat <- NA
d$agecat[d$age >= 60] <- "Elder (>=60s)"
d$agecat[d$age >= 40 & d$age < 60] <- "Middle Aged (40-50s)"
d$agecat[d$age < 40] <- "Young (<=30s)"
## coerce new character variable into a factor variable
d$agecat <- factor(d$agecat, levels=c("Young (<=30s)",
                                      "Middle Aged (40-50s)",
                                      "Elder (>=60s)"))
table(d$agecat, useNA="always") 

# Recoded Cohort
## Cohort I (-1975 Expansion) 
## Cohort II (1975-1990 Stagnation) 
## Cohort III (1990-2000 Expansion) 
## Cohort IV (2000- Universal) 
d$cohort <- NA
d$cohort[which(d$univyr<1975)] <- 1
d$cohort[which(d$univyr>=1975 & d$univyr<1990)] <- 2
d$cohort[which(d$univyr>=1990 & d$univyr<2000)] <- 3
d$cohort[which(d$univyr>=2000 & d$univyr<2010)] <- 4
# d$cohort[which(d$univyr>=2010)] <- 4
## A factor variable
d$cohort <- factor(d$cohort, labels=c("Cohort I (18+ in -1975)",
                                      "Cohort II (18+ in 1976-1989)",
                                      "Cohort III (18+ in 1990-99)",
                                      "Cohort IV (18+ in 2000-09)"))
table(d$cohort, useNA="always")

#'
#' ### Income
#'

# Original
tmp <- icpp09$q36
table(tmp, useNA="always")
# Recoded
## Percentile Conversion Function
convper <- function(old.var,missing.val){
  r <- old.var
  r[r %in% missing.val] <- NA
  rt <- cumsum(table(r)/sum(table(r))) # Cumulative Percentile
  rt <- rt - diff(c(0,rt))/2 # Take Midpoints 
  r <- rt[match(r, names(rt))]
  return(r)
}
d$income <- convper(tmp, c(88,99))
table(d$income, useNA="always")

d$incomecat <- NA
d$incomecat[which(d$income<=0.33)] <- "Low"
d$incomecat[which(d$income>0.33 & d$income<=0.67)] <- "Middle"
d$incomecat[which(d$income>0.67)] <- "High"
d$incomecat[which(is.na(tmp))] <- "Missing"
d$incomecat <- factor(d$incomecat, levels=c("Low","Middle","High","Missing"))
table(d$incomecat, useNA="always") 

#'
#' # Jobs
#'

## Working Status
tmp <- icpp09$q34_1
table(tmp, useNA="always")

d$workstat <- ifelse(tmp%in%c(9)|is.na(tmp),NA,
                     ifelse(tmp%in%c(5),"Self-Employed/Full-Time/Managerial",
                            ifelse(tmp%in%c(1),"Self-Employed/Full-Time/Managerial",
                                   ifelse(tmp%in%c(2,7),"Student/Part-Time",
                                          ifelse(tmp%in%c(3,4),"Self-Employed/Full-Time/Managerial","Not Employed")))))

d$workstat <- factor(d$workstat, 
                     levels=rev(c("Self-Employed/Full-Time/Managerial",
                                  "Student/Part-Time","Not Employed")))
table(d$workstat)

d$employed <- ifelse(d$workstat=="Not Employed",0,1)
table(d$employed)

#'
#' ### Marital Status
#'

table(icpp09$q32, useNA="always")

d$married <- ifelse(icpp09$q32==1,1,0)
table(d$married)

#'
#' ### Parent's Nationality
#'

table(icpp09$q35, useNA="always")

d$foreignparent <- ifelse(icpp09$q35!=1,1,0)
table(d$married)

#'
#' # Urban Rural
#'

table(icpp09$REGION, useNA="always")
d$urban <- NA
d$urban[icpp09$REGION%in%c(18,21,22,23,24,25,31,32,4,5,6,7,13)] <- 1 # 大都市
d$urban[icpp09$REGION%in%c(26,33,34,35,36,2,8,9)] <- 2 # 大都市の郊外
d$urban[icpp09$REGION%in%c(1,11,12,16,17)] <- 3 # 中規模な都市
d$urban[icpp09$REGION%in%c(3,14,15)] <- 4 # 小規模な都市
d$urban[icpp09$REGION%in%c(10)] <- 5 # 町村

d$urban <- (5-d$urban)/4
table(d$urban, useNA="always")

#'
#' # Area
#'

d$area <- icpp09$REGION

#'
#' # Saving Data
#'

#+ eval=FALSE
saveRDS(d, "data_icpp09_v7.rds")